------------------------------------------------------------------------------------------------------------------------------------
      name:  plog_524
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/collapse_bigcps.log
  log type:  text
 opened on:  27 Nov 2024, 16:12:25

. ********************************************************************************
. 
. * collapse_bigcps.do
. * Collapses 1989-2015 big cps data by cohort and other 
. * Extracts made:
. *       1) year-cohort level
. *       2) year-cohort-state (current state)
. *       3) year-cohort-attainment
. *       4) year-cohort-attainment-state
. *
. * Edited by JR, 8/4/17: Modify to get age-22 UE rate, even for cohorts not seen at 22
. *                       As part of this, rearrange program flow.
. *  JR, 9/22/17: Eliminate "cpsnewvariables.do" -- merge extractcps and findpartners here.
. *  JR, 4/10/18: Comment out merge to UR and pop -- do this in combinecollapse instead.
. *  RY, 4/25/18: Revised the collapse so that it has two education groups 
. *  JR, 4/30/18: Add a 2- and 4-category education collapses
. 
. cap project, doinfo

. if _rc==0 {
.          local pdir "`r(pdir)'"                                                     // the project's main dir.
.          local dofile "`r(dofile)'"                                                 // do-file's stub name
.    local sig {bind:{hi:[`dofile'.dta. RP : `dofile'.do, `c(current_date)']}}    // a signature in notes
.    local doasproject=1
. }

. else {
.         local pdir "~/GRscarring"
.         local dofile "collapse_bigcps"
.    local doasproject=0
. }

. 
. set more off

. local rootdir "`pdir'"

. local thisdir "`pdir'"

. 
. 
. local prepdata "`pdir'/scratch"

. local rawdata "`pdir'/rawdata"

. 
. 
. ***************************************************************************************************************
. *************************************
. ********** 0: LOAD DATA *************
. *************************************
. 
. *** CPS DATA ***
. 
. if `doasproject'==1 {
.   project, uses("`prepdata'/extractcps.dta.gz")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/extractcps.dta.gz" filesig(1117482818:1
> 425161591)
.   project, uses("`prepdata'/findpartners.dta.gz")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/findpartners.dta.gz" filesig(307079394:
> 268425751)
. }

. 
.  *unzip extractcps and findpartners;
.   !zcat `prepdata'/extractcps.dta.gz > `prepdata'/extractcps.dta 


.   !zcat `prepdata'/findpartners.dta.gz > `prepdata'/findpartners.dta 


. 
.   use `prepdata'/extractcps.dta

.   *Merge in lives-with-partner
.   qui merge m:1 hh_id yearmo linenum hh_num hh_tiebreak p_tiebreak using `prepdata'/findpartners, gen(mrg2partner)

.   tab mrg2partner, m

   Matching result from |
                  merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
            Matched (3) | 52,432,999      100.00      100.00
------------------------+-----------------------------------
                  Total | 52,432,999      100.00

.   tab year mrg2partner, m

           |  Matching
           |   result
           | from merge
      year | Matched ( |     Total
-----------+-----------+----------
      1979 | 1,314,693 | 1,314,693 
      1980 | 1,546,827 | 1,546,827 
      1981 | 1,456,261 | 1,456,261 
      1982 | 1,404,030 | 1,404,030 
      1983 | 1,394,390 | 1,394,390 
      1984 | 1,374,456 | 1,374,456 
      1985 | 1,375,168 | 1,375,168 
      1986 | 1,353,360 | 1,353,360 
      1987 | 1,348,594 | 1,348,594 
      1988 | 1,286,480 | 1,286,480 
      1989 | 1,308,515 | 1,308,515 
      1990 | 1,362,597 | 1,362,597 
      1991 | 1,348,250 | 1,348,250 
      1992 | 1,327,365 | 1,327,365 
      1993 | 1,309,210 | 1,309,210 
      1994 | 1,271,349 | 1,271,349 
      1995 | 1,251,927 | 1,251,927 
      1996 | 1,108,899 | 1,108,899 
      1997 | 1,114,450 | 1,114,450 
      1998 | 1,116,813 | 1,116,813 
      1999 | 1,123,666 | 1,123,666 
      2000 | 1,120,585 | 1,120,585 
      2001 | 1,201,906 | 1,201,906 
      2002 | 1,312,305 | 1,312,305 
      2003 | 1,302,483 | 1,302,483 
      2004 | 1,283,683 | 1,283,683 
      2005 | 1,279,052 | 1,279,052 
      2006 | 1,271,693 | 1,271,693 
      2007 | 1,260,380 | 1,260,380 
      2008 | 1,257,619 | 1,257,619 
      2009 | 1,273,634 | 1,273,634 
      2010 | 1,277,199 | 1,277,199 
      2011 | 1,265,607 | 1,265,607 
      2012 | 1,258,730 | 1,258,730 
      2013 | 1,253,663 | 1,253,663 
      2014 | 1,261,811 | 1,261,811 
      2015 | 1,245,862 | 1,245,862 
      2016 | 1,244,166 | 1,244,166 
      2017 | 1,227,127 | 1,227,127 
      2018 | 1,188,950 | 1,188,950 
      2019 | 1,149,244 | 1,149,244 
-----------+-----------+----------
     Total |52,432,999 |52,432,999 

.   drop if mrg2partner==2
(0 observations deleted)

.   drop mrg2partner

. 
.   !rm `prepdata'/extractcps.dta


.   !rm `prepdata'/findpartners.dta


. 
. 
. 
. *** COHORT: ****
. gen cohort=(year-age)

. 
. 
. ************************************************
. *********** 1: SAMPLE RESTRICTIONS *************
. ************************************************
. 
. keep if age>15 & age<81
(1,258,825 observations deleted)

. 
. 
. ************************************************
. *********** 2: MAKE SOME VARIABLES *************
. ************************************************
. 
. *** Variables to collapse by:
. 
. *  Weekly hours, w/ zeros
. replace hourslw=0 if hourslw==. & pemlr~=-1
(20,507,587 real changes made)

. *  Weekly hours, w/o zeros 
. gen hourslw_pos=hourslw if hourslw>0 & hourslw<.
(20,667,082 missing values generated)

. 
. gen byte ed_hs=(educ5==2) if educ5<.
(1 missing value generated)

. gen byte ed_scol=(educ5==3) if educ5<.
(1 missing value generated)

. gen byte ed_ba=(educ5==4) if educ5<.
(1 missing value generated)

. gen byte ed_grad=(educ5==5) if educ5<.
(1 missing value generated)

. 
. 
. *gen byte ed_scol_less=(educ5==1 | educ5==2 | educ5==3) if educ5<.
. *gen byte ed_ba_more=(educ5==4 | educ5==5) if educ5<.
. gen byte educ2=(inlist(educ5, 4, 5)) if educ5<.
(1 missing value generated)

. *recode educ5 (1=1) (2=2) (3=3) (4 5=4), gen(educ4)
. 
. *  Mean education of occupation, conditional on employment (use pre-recession base period)
. *bys occ1_2003: egen occ_mn_ed_yrs_tmp=mean(ed_yrs) if inrange(year, 1998, 2007)
. *bys occ1_2003: egen occ_mn_ed_yrs=mode(occ_mn_ed_yrs_tmp)
. 
. *  Mean earnings of occupation, conditional on employment (use pre-recession base period)
. *bys occ: egen occ_mn_tot_ern_r_tmp=mean(tot_ern_r) if inrange(year, 1998, 2007)
. *bys occ: egen occ_mn_tot_ern_r=mode(occ_mn_tot_ern_r_tmp)
. 
. rename stfips fipsst

. sort year cohort fipsst educ5

. tempfile all 

. save `all'
file /tmp/St2868349.000004 saved as .dta format

. 
. 
. 
. *************************************
. *********** 3: COLLAPSE *************
. *************************************
. local mainvars "labfor empl unem married howner hourslw hourslw_pos uhours livewithprnt chld_pr educ_occup lives_spouse_oth"

. ** 1.1: Year-cohort
. collapse (mean) `mainvars' sex educ_yr ed_hs ed_scol ed_ba ed_grad ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort)
(note: aweights not used to compute counts)

. tempfile yc

. save `yc'
file /tmp/St2868349.000005 saved as .dta format

. 
. ** 1.2: Year-cohort-state
. use `all', clear

. collapse (mean) `mainvars' educ_yr ed_hs ed_scol ed_ba ed_grad ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort fipsst)
(note: aweights not used to compute counts)

. tempfile ycs

. save `ycs'
file /tmp/St2868349.000006 saved as .dta format

. 
. ** 1.3: Year-cohort-attainment (5 category)
. use `all', clear

. collapse (mean) `mainvars' educ_yr ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort educ5)
(note: aweights not used to compute counts)

. tempfile yca5

. save `yca5'
file /tmp/St2868349.000007 saved as .dta format

. 
. ** 1.4: Year-cohort-attainment (5 category)-state
. use `all', clear

. collapse (mean) `mainvars' educ_yr ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort fipsst educ5)
(note: aweights not used to compute counts)

. tempfile yca5s

. save `yca5s'
file /tmp/St2868349.000008 saved as .dta format

. 
. ** 1.5: Year-cohort-attainment (2 category)
. use `all', clear

. collapse (mean) `mainvars' educ_yr ed_hs ed_scol ed_ba ed_grad ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort educ2)
(note: aweights not used to compute counts)

. tempfile yca2

. save `yca2'
file /tmp/St2868349.000009 saved as .dta format

. 
. ** 1.6: Year-cohort-attainment (2 category)-state
. use `all', clear

. collapse (mean) `mainvars' educ_yr ed_hs ed_scol ed_ba ed_grad ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort fipsst educ2)
(note: aweights not used to compute counts)

. tempfile yca2s

. save `yca2s'
file /tmp/St2868349.00000a saved as .dta format

. 
. ** 1.7: Year-cohort-attainment (2 category)-sex-state
. use `all', clear

. collapse (mean) `mainvars' educ_yr ed_grad ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort fipsst educ2 sex)
(note: aweights not used to compute counts)

. tempfile yca2ss 

. save `yca2ss'
file /tmp/St2868349.00000b saved as .dta format

. 
. ** 1.8: Year-cohort-attainment (4 category)
. use `all', clear

. collapse (mean) `mainvars' educ_yr ed_grad ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort educ4)
(note: aweights not used to compute counts)

. tempfile yca4

. save `yca4'
file /tmp/St2868349.00000c saved as .dta format

. 
. ** 1.9: Year-cohort-attainment (4 category)-state
. use `all', clear

. collapse (mean) `mainvars' educ_yr ed_grad ///
>                  (count) n_obs=wgt_composite (rawsum) wgt_composite [aw=wgt_composite], ///
>                  by(year cohort fipsst educ4)
(note: aweights not used to compute counts)

. tempfile yca4s

. save `yca4s'
file /tmp/St2868349.00000d saved as .dta format

. 
. 
. ********************************************
. **** 5.  LABEL VARIABLES *****
. ********************************************
.   foreach set in yc ycs yca5 yca5s yca2 yca2s yca2ss yca4 yca4s {
  2.     use ``set''
  3.     label var n_obs "Number of observations in cell"
  4.     label var wgt_composite "Sum of basic CPS weights (unweighted/raw)" 
  5.     label var labfor "In labor force (current status)"
  6.     label var empl "Employed (current status)"                                                  
  7.     label var unem "Unemployed (current status)"                
  8.     label var married "Married"
  9.     label var hourslw "Hours worked last week"
 10.     label var hourslw_pos "Hours worked last week | hours>0"
 11.     cap  label var ed_hs "Educ: HS grad"
 12.     cap  label var ed_scol "Educ: Some coll"
 13.     cap  label var ed_ba "Educ: Bach degree"
 14.     cap  label var ed_grad "Educ: >Bach degree"*/
 15.     cap  label var ed_scol_less "Educ: Some coll or <Some coll"
 16.     cap  label var ed_ba_more "Educ: Bach degree or >BA"
 17.     cap  label var ed_yrs "Educ: years"
 18.     save ``set'', replace
 19.   }
file /tmp/St2868349.000005 saved as .dta format
file /tmp/St2868349.000006 saved as .dta format
file /tmp/St2868349.000007 saved as .dta format
file /tmp/St2868349.000008 saved as .dta format
file /tmp/St2868349.000009 saved as .dta format
file /tmp/St2868349.00000a saved as .dta format
file /tmp/St2868349.00000b saved as .dta format
file /tmp/St2868349.00000c saved as .dta format
file /tmp/St2868349.00000d saved as .dta format

. 
. 
. *************************************
. ****** 6: COMPRESS AND SAVE *********
. *************************************
. 
. 
. foreach col in yc ycs yca5 yca5s yca2 yca2s yca2ss yca4 yca4s {
  2.         use ``col'', clear
  3.         save "`prepdata'/`dofile'_`col'.dta", replace
  4.         *! gzip -f `prepdata'/`dofile'_`col'.dta
.         *project, creates("`prepdata'/`dofile'_`col'.dta.gz")
.         if `doasproject'==1 project, creates("`prepdata'/`dofile'_`col'.dta")
  5. }
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yc.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yc.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yc.dta" filesig(2559
> 044711:462779)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_ycs.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_ycs.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_ycs.dta" filesig(180
> 451784:21632157)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca5.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca5.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yca5.dta" filesig(34
> 05362549:1698773)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca5s.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca5s.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yca5s.dta" filesig(4
> 075547868:83545866)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca2.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca2.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yca2.dta" filesig(13
> 55778008:862453)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca2s.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca2s.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yca2s.dta" filesig(3
> 604218226:42238632)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca2ss.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca2ss.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yca2ss.dta" filesig(
> 1707442527:71548641)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca4.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca4.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yca4.dta" filesig(22
> 78471126:1453194)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca4s.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_bigcps_yca4s.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_bigcps_yca4s.dta" filesig(1
> 902591548:72586127)

. 
. 
. * end of do file *
. 
end of do-file
      name:  plog_524
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/collapse_bigcps.log
  log type:  text
 closed on:  27 Nov 2024, 16:26:17
------------------------------------------------------------------------------------------------------------------------------------
